
****  Goal: Creating Main DID dataset
***** 1. Create wide panel with data from Wave 1 and 2

*********************************
*	Setting Paths		        *
*********************************

* Set working directory to "JOP Replication files" folder on your computer 

use "DATA FILES TO SHARE/TEMP_FILES/ew_merge.dta", clear

tab w1_abshusband_dummy w2_abshusband_dummy

/*
w1_abshusb |  w2_abshusband_dummy
 and_dummy |         0          1 |     Total
-----------+----------------------+----------
         0 |    24,030        868 |    24,898 
         1 |       280        301 |       581 
-----------+----------------------+----------
     Total |    24,310      1,169 |    25,479 

*/



keep xIDHNR xNHNR STATEID DISTID PSUID HHID2005 HHSPLITID2005 xNR1  xNR4 xNR5 /// 
xNR6 xNR7 xNR8 xNR9 xNR10 xNR11 xNR12 xNR13 xPERSONID_NR xNRMPRO3 xNRMPRO4 xNRMPRO5 xNRMPRO6 ///
xNRMPRO7 xPERSONID xCASEID xIDHH xIDPSU xSTATEID2 xDISTNAME xDIST01 xURBAN xMETRO6 xSWEIGHT ///
xCOPC xGROUPS8 xHHASSETS xHHED2 xHHED5ADULT xHHED5F xHHED5M xEW3 xEW4 xEW5 xEW6 /// 
xEW7 xEW8 xEW9 xGRcook_A xGRcook_B xGRcook_C xGRcook_D xGRcook_E xGRcook_F ///
xGRcook_G xGRlux_A xGRlux_B xGRlux_C xGRlux_D xGRlux_E xGRlux_F xGRlux_G ///
xGRchild_A xGRchild_B xGRchild_C xGRchild_D xGRchild_E xGRchild_F xGRchild_G ///
xGR4A xGR4B xGR4C xGR4D xGR4E xGR4F xGR4G xGRmarr_A xGRmarr_B xGRmarr_C xGRmarr_D ///
xGRmarr_E xGRmarr_F xGRmarr_G xGRhc_A xGRhc_B xGRhc_C xGRhc_D xGRhc_E xGRhc_F ///
xGRrel_A xGRrel_B xGRrel_C xGRrel_D xGRrel_E xGRrel_F xGRshop_A xGRshop_B xGRshop_C ///
xGRshop_D xGRshop_E xGRshop_F xGRmandi_A xGRmandi_B xGRmandi_C xGRmandi_D xGR_hh_meal ///
xGR_hh_cash xGR_hh_ac xGR_hh_acname xGR_hh_prop xpol_disc xpol_hhworker xpol_immu ///
xpol_icds xmerge_nr_ew_w1 PERSONID2005 HHID HHSPLITID PERSONID HHWAVES PWAVES SURVEY ///
w1_abshusband_dummy w2_abshusband_dummy xGRghunghat xGRnatalfam  xGRwifebeat_A ///
 xGRwifebeat_B xGRwifebeat_C xGRwifebeat_D xGRwifebeat_E xGRwidows xGRharass  xGRgirlsold_A ///
 xGRgirlsold_B xGRgirlsold_C xGRgirlsold_D xcaste_marr xINCOME

** had to remove xNR2 

** xPERSONID is same as PERSONID2005. It is not really needed now. 
drop xPERSONID 
**re-name all vars to drop the x prefix
rename x* *

gen year = 0 


*** Need to re-name some of the EW characteristics variables 

rename (EW4 EW5 EW6 EW7 EW8 EW9)(EW_relhh EW_Age EW_Date EW_edu EW_child EW_health)

save "DATA FILES TO SHARE/TEMP_FILES/ew_did_w1.dta.dta", replace


clear
use "DATA FILES TO SHARE/TEMP_FILES/ew_merge.dta"

keep xIDHNR xNHNR STATEID DISTID PSUID HHID2005 HHSPLITID2005 xmerge_nr_ew_w1 PERSONID2005 HHID /// 
HHSPLITID PERSONID HHWAVES PWAVES SURVEY NNR NR0 NR1 NR4 NR5 NR6 NR7 NR8 NR9 NR10 NR11 ///
NR12 NR13A NR13B IDPSU IDHH IDPERSON EW5 EW6 EW7Y EW8 EW9 EW10 EW11 GRcook_A GRcook_B  ///
GRcook_C GRcook_D GRcook_E GRcook_F GRcook_G GRlux_A GRlux_B GRlux_C GRlux_D GRlux_E ///
GRlux_F GRlux_G GRchild_A GRchild_B GRchild_C GRchild_D GRchild_E GRchild_F GRchild_G ///
GR4A GR4B GR4C GR4D GR4E GR4F GR4G GRmarr_A GRmarr_B GRmarr_C GRmarr_D GRmarr_E ///
GRmarr_F GRmarr_G GRhc_A GRhc_B GRhc_C GRhc_D GRhc_E GRhc_F GRrel_A GRrel_B GRrel_C ///
GRrel_D GRrel_E GRrel_F GRshop_A GRshop_B GRshop_C GRshop_D GRshop_E GRshop_F ///
pol_mem_mm pol_mem_shg pol_mem_cred pol_mem_porg pol_disc GRmandi_A GRmandi_B ///
GRmandi_C GRmandi_D GR_hh_meal GR_hh_cash GR_hh_ac GR_hh_acname GR_hh_prop pol_meet ///
emp_wage emp_nrega emp_nregacurr emp_say emp_unem emp_unemp2 pol_hhworker ///
pol_immu pol_icds w1_abshusband_dummy w2_abshusband_dummy GROUPS8 caste ///
GRghunghat GRnatalfam GRnatalfam_they GRnatalfam_phone  GRwifebeat_A GRwifebeat_E ///
GRwifebeat_B GRwifebeat_C GRwifebeat_F GRwifebeat_D GRwidows GRharass  GRgirlsold_A ///
GRgirlsold_B GRgirlsold_C GRgirlsold_D caste_marr INCOME

** had to remove NR2

rename x* *

**keep only those who didn't have migrant husbands in round one
gen year = 1

*** the variable on mobility changed in the second wave. Re-coding it to reflect the first wave codes
****GRhc_A, GRrel_A,GRshop_A -- where there was a new categor of "Must inform" tagged as 1. In round 1 it was///
***coded as "NO". So i will now subsume that into NO. 

gen GRhc_A_new = 0 
replace GRhc_A_new = 1 if GRhc_A == 2
drop GRhc_A
rename GRhc_A_new GRhc_A


gen GRshop_A_new = 0 
replace GRshop_A_new = 1 if GRshop_A == 2
drop GRshop_A
rename GRshop_A_new GRshop_A

gen GRrel_A_new = 0 
replace GRrel_A_new = 1 if GRrel_A == 2
drop GRrel_A
rename GRrel_A_new GRrel_A

rename (EW5 EW6 EW7Y EW8 EW9 EW10) (EW_relhh EW_Age EW_Date EW_edu EW_child EW_health)

save "DATA FILES TO SHARE/TEMP_FILES/ew_did_w2.dta", replace

gen edu_level = cond(EW_edu > 12, 3 , cond(EW_edu > 5, 2, cond(EW_edu>1,1, 0)))

append using "DATA FILES TO SHARE/TEMP_FILES/ew_did_w1.dta.dta", force

gen mig_status = 0
replace mig_status = 1 if w1_abshusband_dummy == 1 & w2_abshusband_dummy == 0
replace mig_status = 2 if w1_abshusband_dummy == 1 & w2_abshusband_dummy == 1
replace mig_status = 3 if w1_abshusband_dummy == 0 & w2_abshusband_dummy == 1

 label define mig_status 0 "always non-migrant" 1 "wav 1 migrant only" 2 "always migrant" 3"wave 2 migrant only" 
 label values mig_status mig_status
 
 
 
gen str6 vill_id = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") 
egen z = group(vill_id)
sum z
destring vill_id, generate(vill_id_num)

 ** create a UID for the individual
 
 gen str12 puid_using2005 = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID2005,"%02.0f") + string(HHSPLITID2005,"%02.0f") + string(PERSONID2005,"%02.0f") 
 
egen p = group(puid_using2005)
sum p
destring puid_using2005, generate(puid)
egen s = group(puid)
sum s

/*


    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
           s |     50,958       12740    7355.226          1      25479

There are no duplicates, and two records per person 


*/
 
  ** UID for hh
 gen str10 hhuid_using2005 = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID2005,"%02.0f") + string(HHSPLITID2005,"%02.0f") 
egen y = group(hhuid_using2005)
sum y

** Since we have only one woman per HH, the no. of unique HH is simialr to individuals
destring hhuid_using2005, generate(hhuid_num)
 
 
*** Run code to create indices and clean up variables  
do "CODE/DATA/CODE/ihds_changesforrg_didindex.do"

 
 ** Now generate a variable that identifies if a woman's husabnd ws migrant or not at a given point - so it's all collapsed in one variable 
 gen mig_husb = 0 
 replace mig_husb = 1 if w1_abshusband_dummy == 1 & year == 0
 replace mig_husb = 1 if w2_abshusband_dummy == 1 & year == 1
 
 
 
 ** For DID with truncated sample
gen did_sample  = 0 
replace did_sample = 1 if  w1_abshusband_dummy == 0 & w2_abshusband_dummy == 0
replace did_sample = 1 if  w1_abshusband_dummy == 0 & w2_abshusband_dummy == 1
 
 ** for the DID FE make the DID variable 
 
  
  ** JUST SAVE THIS DATA FRAME C****

save "DATA FILES TO SHARE/TEMP_FILES/main_df.dta", replace


clear
use "DATA FILES TO SHARE/IHDS_RAW/37382-0011-Data.dta"

gen year = 0
replace year = 1 if SURVEY == 2
 gen str10 hhuid_using2005 = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID2005,"%02.0f") + string(HHSPLITID2005,"%02.0f") 
egen y = group(hhuid_using2005)
sum y
 destring hhuid_using2005, generate(hhuid_num)
 
 ** HHUID -- from the pooled thing - here it taakes the HHID and HHSPLIT basically from the second wave 
 gen str11 hhuid = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID,"%03.0f") + string(HHSPLITID,"%02.0f") 
egen x = group(hhuid)
sum x

sort STATEID DISTID PSUID HHID HHSPLITID year 

merge 1:1 STATEID DISTID PSUID HHID HHSPLITID year using "DATA FILES TO SHARE/TEMP_FILES/main_df.dta"


** can't figuure out why 1220 women did not match with the HH information 
** could be error on their part 
* drop them 

keep if _merge == 3  
 

  *** for the ehalth and child var, just make the negative value blacnks
  
replace EW_health = . if EW_health<0
  
replace EW_child = . if EW_child<0
replace EW_edu = . if EW_edu<0
replace EW_Age = . if EW_Age<0

  save "DATA FILES TO SHARE/TEMP_FILES/main_ew_hh_df.dta", replace




